Attribute Information:
Date Time: Each ten minutes. Temperature: Weather Temperature of Tetouan city. Humidity: Weather Humidity of Tetouan city. Wind Speed of Tetouan city. general diffuse flows diffuse flows power consumption of zone 1 of Tetouan city. power consumption of zone 2 of Tetouan city. power consumption of zone 3 of Tetouan city.
rm(list = ls()) #initialization
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5 ✔ purrr 0.3.4
## ✔ tibble 3.1.6 ✔ dplyr 1.0.8
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
library(lubridate)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:data.table':
##
## first, last
## The following objects are masked from 'package:dplyr':
##
## first, last
library(zoo)
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
# read the real data on July 1 for test
df <- read.csv("Tetuan City power consumption.csv")
colnames(df)
## [1] "DateTime" "Temperature"
## [3] "Humidity" "Wind.Speed"
## [5] "general.diffuse.flows" "diffuse.flows"
## [7] "Zone.1.Power.Consumption" "Zone.2..Power.Consumption"
## [9] "Zone.3..Power.Consumption"
# check for missing values
sum(is.na(df$Zone1))
## [1] 0
colnames(df)[7] <- "Zone1"
colnames(df)[8] <- "Zone2"
colnames(df)[9] <- "Zone3"
# ts plot and acf plot
var = colnames(df)
var = var[-1]
for(i in var) {
ts.plot(df[[i]], ylab=i)
acf(df[[i]], ylab = paste0(i," ACF"), main="")
}
library(tseries)
for(i in var) {
print(adf.test(df[[i]])) # detects stationarity
print(kpss.test(df[[i]],null = "Trend")) # detects trend stationarity
}
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df[[i]]
## Dickey-Fuller = -13.207, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: df[[i]]
## KPSS Trend = 42.545, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df[[i]]
## Dickey-Fuller = -20.358, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: df[[i]]
## KPSS Trend = 2.1044, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df[[i]]
## Dickey-Fuller = -7.717, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: df[[i]]
## KPSS Trend = 16.004, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df[[i]]
## Dickey-Fuller = -35.446, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: df[[i]]
## KPSS Trend = 4.9499, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df[[i]]
## Dickey-Fuller = -32.848, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: df[[i]]
## KPSS Trend = 1.3615, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df[[i]]
## Dickey-Fuller = -32.63, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: df[[i]]
## KPSS Trend = 6.6416, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df[[i]]
## Dickey-Fuller = -31.192, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: df[[i]]
## KPSS Trend = 2.089, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df[[i]]
## Dickey-Fuller = -19.704, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: df[[i]]
## KPSS Trend = 23.651, Truncation lag parameter = 19, p-value = 0.01
date <- seq(from=as.POSIXct("2017-01-01 00:00", format = "%Y-%m-%d %H:%M"), length.out = nrow(df), by = "10 min")
df$DateTime <- date #df(df$DateTime, format="%m/%d/%Y %H:%M")
#attr(df$DateTime, "tzone") <- "Africa/Casablanca"
df$year <- year(df$DateTime)
df$month <- month(df$DateTime)
df$week <- week(df$DateTime)
df$day <- day(df$DateTime)
df$hour <- hour(df$DateTime)
df$minute <- minute(df$DateTime)
df_xts <- xts(df[,7], date)
train_daily <- df_xts['2017-01-01/2017-12-29']
test_daily <- df_xts['2017-12-30']
plot(train_daily, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")
plot(test_daily, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")
#df_weekly <- filter(df,week == 1)
#Z1_weekly <- ts(df_weekly$Zone1, frequency=6*24*52, start=c(2017,1))
#plot(Z1_weekly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")
train_weekly <- df_xts['2017-01-01/2017-12-23']
test_weekly <- df_xts['2017-12-24/2017-12-30']
plot(train_weekly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")
plot(test_weekly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")
train_monthly <- df_xts['2017-01-01/2017-11-30']
test_monthly <- df_xts['2017-12-01/2017-12-30']
plot(train_monthly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")
plot(test_monthly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")
comp_daily <- decompose(ts(train_weekly,frequency = 6*24))
plot(comp_daily)
fit_daily = stlf(ts(train_daily,frequency = 6*24))
fit_daily$model
## ETS(A,N,N)
##
## Call:
## ets(y = na.interp(x), model = etsmodel, allow.multiplicative.trend = allow.multiplicative.trend)
##
## Smoothing parameters:
## alpha = 0.9989
##
## Initial states:
## l = 35146.4894
##
## sigma: 278.7779
##
## AIC AICc BIC
## 1156524 1156524 1156551
# forecast
pred_daily <- forecast(fit_daily,h=nrow(test_daily))
autoplot(pred_daily)
#pred_daily <- forecast(fit_daily, h=nrow(test_daily))
pred_df_daily <- data.table(time=index(test_daily), test_val=test_daily[,1], pred_val=pred_daily$mean)
d<- melt(pred_df_daily, id.vars = "time")
p <- ggplot(d, aes(x=time, y=value, color=variable)) + geom_point(size=1) + geom_line()
p + labs(title="Zone 1 Power Consumption: daily",
x ="Time", y = "Watt Hours")
comp_weekly <- decompose(ts(train_weekly,frequency = 6*24*52))
plot(comp_weekly)
fit_weekly = stlf(ts(train_weekly,frequency = 6*24*52))
fit_weekly$model
## ETS(A,Ad,N)
##
## Call:
## ets(y = na.interp(x), model = etsmodel, allow.multiplicative.trend = allow.multiplicative.trend)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.6346
## phi = 0.8
##
## Initial states:
## l = 31972.1007
## b = -337.3653
##
## sigma: 394.0297
##
## AIC AICc BIC
## 1172130 1172130 1172183
pred_weekly <- forecast(fit_weekly, h=nrow(test_weekly))
autoplot(pred_weekly)
#pred_weekly <- forecast(fit_weekly, h=nrow(test_weekly))
pred_df_weekly <- data.table(time=index(test_weekly), test_val=test_weekly[,1], pred_val=pred_weekly$mean)
d<- melt(pred_df_weekly, id.vars = "time")
p <- ggplot(d, aes(x=time, y=value, color=variable)) + geom_point(size=1) + geom_line()
p + labs(title="Zone 1 Power Consumption: weekly",
x ="Time", y = "Watt Hours")
comp_monthly <- decompose(ts(train_monthly,frequency = 6*24*30))
plot(comp_monthly)
fit_monthly = stlf(ts(train_monthly,frequency = 6*24*30))
fit_monthly$model
## ETS(A,Ad,N)
##
## Call:
## ets(y = na.interp(x), model = etsmodel, allow.multiplicative.trend = allow.multiplicative.trend)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.6058
## phi = 0.8001
##
## Initial states:
## l = 32492.7206
## b = -306.6227
##
## sigma: 408.1339
##
## AIC AICc BIC
## 1096795 1096795 1096848
pred_monthly <- forecast(fit_weekly, h=nrow(test_weekly))
autoplot(pred_monthly)
#pred_monthly <- forecast(fit_monthly, h=nrow(test_monthly))
pred_df_monthly <- data.table(time=index(test_monthly), test_val=test_monthly[,1], pred_val=pred_monthly$mean)
## Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names =
## check.names, : Item 3 has 1008 rows but longest item has 4320; recycled with
## remainder.
d<- melt(pred_df_monthly, id.vars = "time")
p <- ggplot(d, aes(x=time, y=value, color=variable)) + geom_point(size=1) + geom_line()
p + labs(title="Zone 1 Power Consumption: monthly",
x ="Time", y = "Watt Hours")
fit_annual = auto.arima(ts(df$Zone1,frequency = 6*24*365), seasonal = T)
fit_annual
## Series: ts(df$Zone1, frequency = 6 * 24 * 365)
## ARIMA(3,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 1.0600 -0.2510 0.0675 -0.4518
## s.e. 0.0299 0.0183 0.0064 0.0298
##
## sigma^2 = 202928: log likelihood = -394643.6
## AIC=789297.2 AICc=789297.2 BIC=789341.6
pred_annual <- forecast(fit_annual, h=nrow(test_daily))
pred_df_annual <- data.table(time=index(df_xts), test_val=test_daily[,1], pred_val=pred_annual$mean)
d<- melt(pred_df_annual, id.vars = "time")
ggplot(d, aes(x=time, y=value, color=variable)) +
geom_point(size=1) +
geom_line()
smape(xts(pred_df_daily$pred_val, order.by = pred_df_daily$time), test_daily)
## [1] 0.06732304
smape(xts(pred_df_weekly$pred_val, order.by = pred_df_weekly$time), test_weekly)
## [1] 0.07291239
smape(xts(pred_df_monthly$pred_val, order.by = pred_df_monthly$time), test_monthly)
## [1] 0.06944146
smape(pred_annual$mean, test_daily[,1])
## Warning in ae(actual, predicted): Incompatible methods ("Ops.ts", "Ops.xts") for
## "-"
## Warning in mean(ae(actual, predicted)/(abs(actual) + abs(predicted))):
## Incompatible methods ("Ops.ts", "Ops.xts") for "+"
## [1] 0.2071637
train <- df[1:(nrow(df)-6*24*30),]
test <- df[(nrow(df)-6*24*30+1):nrow(df),]
temp_xreg <- as.matrix(train[,c("Temperature")])
lm_model <- forecast::auto.arima(train$Zone1, xreg=temp_xreg)
lm_res <- forecast(lm_model,h=nrow(test),xreg = test[,c("Temperature")])$mean
lm_model
## Series: train$Zone1
## Regression with ARIMA(3,1,1) errors
##
## Coefficients:
## ar1 ar2 ar3 ma1 xreg
## 1.0324 -0.2322 0.0700 -0.4306 32.5107
## s.e. 0.0320 0.0194 0.0068 0.0320 11.2340
##
## sigma^2 = 210310: log likelihood = -362976
## AIC=725964 AICc=725964 BIC=726016.7
pred_lm_monthly <- data.table(time=train$DateTime, test_val=test$Zone1, pred_val=lm_res)
## Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names =
## check.names, : Item 2 has 4320 rows but longest item has 48096; recycled with
## remainder.
## Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names =
## check.names, : Item 3 has 4320 rows but longest item has 48096; recycled with
## remainder.
d <- melt(pred_lm_monthly, id.vars = "time")
p <- ggplot(d, aes(x=time, y=value, color=variable)) + geom_point(size=1) + geom_line()
p + labs(title="Zone 1 Power Consumption: monthly",
x ="Time", y = "Watt Hours")
smape(lm_res, test$Zone1)
## [1] 0.2082446